Introduction

The report describes a limma analysis of melanoma samples from Depmap TPM bulk RNAseq gene expression data, and creates lists of DE genes between the average gene expression in primary tumor samples versus average gene expression in various metastatic organs (e.g. lung, liver, etc.).

Background

Questions that we would like to answer with the Depmap data TPM RNAseq data:

  1. What is the difference between cell lines with tropism in liver versus lungs?
  2. Which are the top genes in each metastatic-colonized organ, based on the cell line status?
  3. Find genes unique in metastatic cell lines (for each metastatic organ) versus primary tumor cell lines?
  4. Can we identify gene regulatory networks specific for each organ and across organs? (to do later, in a following report)

Data Preparation

We have pre-downloaded and semi-processed the Depmap melanoma metadata and TPM count matrix, which we load here below:

readRDS("./data/TPM_22Q2_mat.rds") %>%
  dplyr::select(all_of(sort(names(.)))) %>%
  as.data.frame() -> TPM_22Q2_mat
# View(TPM_22Q2_mat)

readxl::read_excel("./data/depmap_melanoma_cell_lines_metadata_2025-06-02.xlsx") %>%
  dplyr::filter(depmap_id %in% names(TPM_22Q2_mat)) %>%
  dplyr::arrange(depmap_id) %>%
  dplyr::select(-c(## remove useless columns
    aliases, source, cosmic_id, additional_info, RRID, WTSI_master_cell_ID,
    lineage_molecular_subtype, model_manipulation, model_manipulation_details,
    sanger_id, default_growth_pattern, lineage_sub_subtype, parent_depmap_id,
    patient_id, Cellosaurus_NCIt_id)) %>%     
  dplyr::mutate(across(primary_or_metastasis, ~ ifelse(is.na(.), "Unknown", .)),
                met_site_group = factor(ifelse(primary_or_metastasis == "Primary",
                                 "primary", tolower(sample_collection_site)))) %>%
  as.data.frame() -> metadata
# View(metadata)

## uncomment if you want to save the converted metadata table
# writexl::write_xlsx(x = metadata,
#                     path = "./data/metadata_clean.xlsx")

metadata %>% 
  DT::datatable()

Preliminary Visualizations

Donut Plot

Visualization of number of metastatic samples by organ sample site:

# Summarize counts
metadata %>%
  dplyr::filter(primary_or_metastasis == "Metastasis") %>%
  count(sample_collection_site) %>%
  arrange(desc(sample_collection_site)) %>%
  mutate(prop = n / sum(n) * 100,
         ypos = cumsum(prop) - 0.5 * prop) -> df_counts

# Create donut plot
df_counts %>%
  ggplot(aes(x = 2, y = prop, fill = sample_collection_site)) +
    geom_bar(stat = "identity", width = 1, color = "black") +
    coord_polar(theta = "y") +
    xlim(0.5, 2.5) +
    theme_void() +
    geom_text_repel(aes(y = ypos, label = paste0(
      sample_collection_site, "\n", round(prop, 1), "%")), color = "black", size = 4) +
    ggtitle("Distribution of Sample Collection Sites (Metastatic Samples Only)") +
    theme(plot.title = element_text(hjust = 0.5),
          legend.position = "none")

Bar Plot

Visualization of number of metastatic samples by organ sample site:

metadata %>%
  dplyr::filter(primary_or_metastasis == "Metastasis") %>%
  count(sample_collection_site) %>%
  as.data.frame() -> metadata_summary

# Create bar plot
metadata_summary %>%
ggplot(aes(x = sample_collection_site, y = n, fill = sample_collection_site)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Distribution of Sample Collection Sites (Metastatic Samples Only)",
    x = "Sample Type", y = "Count") +
  theme_classic() +
  theme(legend.position = "none") +
  theme(axis.text.x = element_text(angle = 45, hjust=1))

Limma Expression Analysis

NOTE: “primary tumor” samples are the “normative” condition meaning that all genes with positive LFC are highter in this condition and genes with negative LFC are higher in the experimental conditions (e.g. lung).

Create the design matrix

This gives you a design matrix with one column per group, with no intercept (~ 0 +) so contrasts are direct.

design <- model.matrix(~ 0 + met_site_group, data = metadata)
colnames(design) <- levels(metadata$met_site_group)

Create a contrast matrix

Now define contrasts to compare metastatic groups to the primary group.

contrast.matrix <- makeContrasts(
  skin_vs_primary = skin - primary,
  lung_vs_primary = lung - primary,
  liver_vs_primary = liver - primary,
  lymph_node_vs_primary = lymph_node - primary,
  pleural_effusion_vs_primary = pleural_effusion - primary,
  central_nervous_system_vs_primary = central_nervous_system - primary,
  ascites_vs_primary = ascites - primary,
  fibroblast_vs_primary = fibroblast - primary,
  uvea_vs_primary = uvea - primary,
  sinonasal_vs_primary = sinonasal - primary,
  levels = design)

Filter out low count genes because they add to the noise of the data

log_tpm <- TPM_22Q2_mat
keep <- rowMeans(2^log_tpm - 1) > 1  # Back-transform and filter
log_tpm <- log_tpm[keep, ]

Fit Model

# Fit linear model directly to log2(TPM+1)
fit <- lmFit(log_tpm, design)
fit2 <- contrasts.fit(fit, contrast.matrix)

# Empirical Bayes moderation
fit2 <- eBayes(fit2)

# Extract top differentially expressed genes
skin_de <- topTable(fit2, coef = "skin_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
lung_de <- topTable(fit2, coef = "lung_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
liver_de <- topTable(fit2, coef = "liver_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
lymph_de <- topTable(fit2, coef = "lymph_node_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
pe_de <- topTable(fit2, coef = "pleural_effusion_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
cns_de <- topTable(fit2, coef = "central_nervous_system_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
ascite_de <- topTable(fit2, coef = "ascites_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
fibro_de <- topTable(fit2, coef = "fibroblast_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
uvea_de <- topTable(fit2, coef = "uvea_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")
sinonasal_de <- topTable(fit2, coef = "sinonasal_vs_primary", adjust.method = "BH", number = nrow(log_tpm)) %>% tibble::rownames_to_column(var = "gene_name")

Volcano Plots

A Volcano plot (log2 fold change vs -log10 p-value) helps identify genes that display large magnitude changes and high significance.

Primary Tumor vs Skin Metastases

skin_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[1])))

Primary Tumor vs Lung Metastases

lung_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[2])))

Primary Tumor vs Liver Metastases

liver_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[3])))

Primary Tumor vs Lymph Node Metastases

lymph_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[4])))

Primary Tumor vs Pleural Effusion Metastases

pe_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[5])))

Primary Tumor vs Central Nervus System Metastases

cns_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[6])))

Primary Tumor vs Ascite Metastases

ascite_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[7])))

Primary Tumor vs Fibroblast Metastases

fibro_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[8])))

Primary Tumor vs Uvea Metastases

uvea_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[9])))

Primary Tumor vs Sinonasal Metastases

sinonasal_de %>%
  dplyr::filter(!is.na(P.Value)) %>%
  dplyr::mutate(
    negLogP = -log10(P.Value),
    sig = ifelse(adj.P.Val < 0.05 & abs(logFC) > 1,
                 "Significant", "Not Significant")) -> volcano_data
volcano_data %>%
  ggplot(aes(x = logFC, y = negLogP, color = sig)) +
  geom_point(size = 0.75, alpha = 0.75) +
  geom_text_repel(data = subset(volcano_data, sig == "Significant"),
                  aes(label = gene_name), size = 3, max.overlaps = Inf,
                  box.padding = 0.3, point.padding = 0.3, segment.color = "grey50") +
  scale_color_manual(values = c("Significant" = "red", "Not Significant" = "black")) +
  geom_hline(yintercept = -log10(0.05), color = "blue", linetype = "dashed") +
  geom_vline(xintercept = c(-1, 1), color = "blue", linetype = "dashed") +
  theme_bw() +
  ggtitle(paste0("Volcano Plot ", gsub("_", " ", names(as.data.frame(contrast.matrix))[10])))

Save Results

We save the results as a Excel spreadsheet with a different DE gene list on each Excel sheet.

de_list <- list(
  skin_de, lung_de, liver_de, lymph_de, pe_de, cns_de, ascite_de, fibro_de,
  uvea_de, sinonasal_de)

names(de_list) <- c(
  "skin_vs_primary_degs", "lung_vs_primary_degs", "liver_vs_primary_degs",
  "lymph_node_vs_primary_degs", "pleural_effusion_vs_primary_degs",
  "cns_vs_primary_degs", "ascite_vs_primary_degs", "fibroblast_vs_primary_degs",
  "uvea_vs_primary_degs", "sinonasal_vs_primary_degs")
writexl::write_xlsx(x = de_list,
                    path = paste0("./data/depmap_melanoma_limma_",
                                  Sys.Date(), ".xlsx"))

Session Info

sessionInfo()
#> R version 4.5.0 (2025-04-11)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.12.0 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Europe/Brussels
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#>  [1] DT_0.33            writexl_1.5.4      readr_2.1.5        readxl_1.4.5      
#>  [5] pheatmap_1.0.12    limma_3.65.1       ggrepel_0.9.6.9999 ggplot2_3.5.2     
#>  [9] tibble_3.2.1       tidyr_1.3.1        dplyr_1.1.4       
#> 
#> loaded via a namespace (and not attached):
#>  [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.0     tidyselect_1.2.1  
#>  [5] Rcpp_1.0.14        dichromat_2.0-0.1  jquerylib_0.1.4    scales_1.4.0      
#>  [9] statmod_1.5.0      yaml_2.3.10        fastmap_1.2.0      R6_2.6.1          
#> [13] labeling_0.4.3     generics_0.1.4     knitr_1.50         htmlwidgets_1.6.4 
#> [17] tzdb_0.5.0         bslib_0.9.0        pillar_1.10.2      RColorBrewer_1.1-3
#> [21] rlang_1.1.6        cachem_1.1.0       xfun_0.52          sass_0.4.10       
#> [25] cli_3.6.5          withr_3.0.2        magrittr_2.0.3     crosstalk_1.2.1   
#> [29] digest_0.6.37      grid_4.5.0         rstudioapi_0.17.1  hms_1.1.3         
#> [33] lifecycle_1.0.4    vctrs_0.6.5        evaluate_1.0.3     glue_1.8.0        
#> [37] cellranger_1.1.0   farver_2.1.2       rmarkdown_2.29     purrr_1.0.4       
#> [41] tools_4.5.0        pkgconfig_2.0.3    htmltools_0.5.8.1